/*
 * TOP SECRET Copyright 2006-2015 Transsion.com All right reserved. This software is the confidential and proprietary
 * information of Transsion.com ("Confidential Information"). You shall not disclose such Confidential Information and
 * shall use it only in accordance with the terms of the license agreement you entered into with Transsion.com.
 */
package com.yunji.framework_template.biz.crawler.pk;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Resource;

import org.springframework.stereotype.Service;

import com.yunji.framework_template.biz.crawler.ContentImageHandler;
import com.yunji.framework_template.biz.crawler.NewsCrawler;
import com.yunji.framework_template.biz.crawler.SourceType;
import com.yunji.framework_template.common.enumeration.CountryCode;
import com.yunji.framework_template.common.enumeration.NewsType;
import com.yunji.framework_template.common.util.NumberUtil;

/**
 * ClassName:NationNewsCrawler <br/>
 * Date: 2018年12月18日 下午3:31:24 <br/>
 * 
 * @author fenglibin1982@163.com
 * @Blog http://blog.csdn.net/fenglibing
 * @version
 * @see
 */
@Service
public class JangNewsCrawler extends NewsCrawler {

    @Resource
    private JangNewsImageHandler jangNewsImageHandler;

    @Override
    public boolean isOkUrl(String url) {
        // 通过判断url的path中是否包括年月来判断其是否为新闻详情页
        URI uri;
        try {
            if (url == null || url.trim().length() == 0) {
                return false;
            }
            if (url.startsWith("mailto") || url.toLowerCase().startsWith("javascript")) {
                return false;
            }
            uri = new URI(url);
            String host = uri.getHost();
            if (host.indexOf("jang.com.pk") < 0) {
                return false;
            }
            String path = uri.getPath();
            String[] pathArr = path.split("/");
            if (pathArr.length == 3 && (pathArr[2].split("-").length >= 2 || NumberUtil.isNumber(pathArr[2]))) {
                return true;
            } else if (path.startsWith("/events") && pathArr.length == 3 && NumberUtil.isNumber(pathArr[2])) {
                return true;
            }
        } catch (URISyntaxException e) {
        }
        return false;
    }

    @Override
    public ContentImageHandler getContentImageHandler() {
        return jangNewsImageHandler;
    }

    @Override
    public List<SourceType> getSourceTypeList() {
        List<SourceType> sourceTypeList = new ArrayList<SourceType>();
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/urdu-news").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/national").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/world").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/business").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/sports").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/entertainment").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/health-science").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/amazing").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/special-reports").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/misc-news").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/topstory").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/editorial").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/sports").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/europe").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/world").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/national").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/karachi").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/entertainment").newsType(NewsType.WOMAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/business").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/education-women-health").newsType(NewsType.FOOD).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/sindh").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/letters").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/today-newspaper/editorial").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/columns-archive").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/karachi").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/islamabad").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/lahore").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/peshawar").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/quetta").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/mera-shahar/multan").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/amazing").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/entertainment").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/technology").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/sports").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/siaasat").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/jurm-o-saza").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/khaas-report/duniya-bhar-say").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/latest-news/video-reports").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/photo-albums").newsType(NewsType.WOMAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/economy").newsType(NewsType.FOOD).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/nisf-sae-zyada").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/maholiyat").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/beauty-and-skin-care").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/ghar-pyara-ghar").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/fashion-and-showbiz").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/khana-khazana").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/women").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/tameeraat").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/taleem").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/zaraat").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/commerce").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/financial-times").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/jang-fashion").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/mkrf-society").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/jang-forum").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/special-edition").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/jurm-o-saza").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/nojawan").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/bachon-ka-jang").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/siaasi-tajziye-aur-tabsiry").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/health").newsType(NewsType.WOMAN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/the-telegraph").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/home-and-decor").newsType(NewsType.FOOD).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/wadi-mehran").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/adab").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/almi-manzar-nama").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/sunday-magazine").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/midweek-magazine").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/biladi").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/science-and-technology").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/fun-o-funkar").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/iqra").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/category/magzine/sports").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://jang.com.pk/events/").newsType(NewsType.HEALTH).build());

        return sourceTypeList;
    }

    @Override
    public Set<String> getCountryCodeSet() {
        Set<String> countryCodeSet = new HashSet<String>();
        countryCodeSet.add(CountryCode.PK.name());
        return countryCodeSet;
    }
}
